{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 5.模型的构建"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    " \n",
    "from sklearn.model_selection import cross_val_score\n",
    "from sklearn.metrics import make_scorer, mean_squared_error\n",
    "from sklearn.linear_model import Ridge, RidgeCV, ElasticNet, LassoCV, LassoLarsCV\n",
    "from sklearn.model_selection import cross_val_score\n",
    "from operator import itemgetter\n",
    "import itertools\n",
    "\n",
    "pd.set_option('display.float_format',lambda x:'{:.3f}'.format(x))\n",
    "\n",
    "import xgboost as xgb"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 5.1 读取数据"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(2917, 460)"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train = pd.read_csv(\"./data_change/train_afterchange.csv\")\n",
    "test = pd.read_csv(\"./data_change/test_afterchange.csv\")\n",
    "alldata = pd.concat((train.iloc[:,1:-1], test.iloc[:,1:]), ignore_index=True)\n",
    "alldata.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "X_train = train.iloc[:,1:-1]\n",
    "y = train.iloc[:,-1]\n",
    "X_test = test.iloc[:,1:]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 定义验证函数\n",
    "def rmse_cv(model):\n",
    "    rmse= np.sqrt(-cross_val_score(model, X_train, y, scoring=\"neg_mean_squared_error\", cv = 5))\n",
    "    return(rmse)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 5.2 LassoCV"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "D:\\Python\\Anaconda\\lib\\site-packages\\sklearn\\model_selection\\_split.py:1978: FutureWarning: The default value of cv will change from 3 to 5 in version 0.22. Specify it explicitly to silence this warning.\n",
      "  warnings.warn(CV_WARNING, FutureWarning)\n",
      "D:\\Python\\Anaconda\\lib\\site-packages\\sklearn\\linear_model\\coordinate_descent.py:471: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations. Duality gap: 0.0155374655066991, tolerance: 0.015370948099408708\n",
      "  tol, rng, random, positive)\n",
      "D:\\Python\\Anaconda\\lib\\site-packages\\sklearn\\linear_model\\coordinate_descent.py:471: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations. Duality gap: 0.07695836129175149, tolerance: 0.015370948099408708\n",
      "  tol, rng, random, positive)\n",
      "D:\\Python\\Anaconda\\lib\\site-packages\\sklearn\\model_selection\\_split.py:1978: FutureWarning: The default value of cv will change from 3 to 5 in version 0.22. Specify it explicitly to silence this warning.\n",
      "  warnings.warn(CV_WARNING, FutureWarning)\n",
      "D:\\Python\\Anaconda\\lib\\site-packages\\sklearn\\linear_model\\coordinate_descent.py:471: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations. Duality gap: 0.027054040649757916, tolerance: 0.011786454323548159\n",
      "  tol, rng, random, positive)\n",
      "D:\\Python\\Anaconda\\lib\\site-packages\\sklearn\\linear_model\\coordinate_descent.py:471: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations. Duality gap: 0.014022090872073179, tolerance: 0.012691417263757743\n",
      "  tol, rng, random, positive)\n",
      "D:\\Python\\Anaconda\\lib\\site-packages\\sklearn\\linear_model\\coordinate_descent.py:471: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations. Duality gap: 0.02357493856280879, tolerance: 0.013343221991451494\n",
      "  tol, rng, random, positive)\n",
      "D:\\Python\\Anaconda\\lib\\site-packages\\sklearn\\linear_model\\coordinate_descent.py:471: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations. Duality gap: 0.16669587001229402, tolerance: 0.013343221991451494\n",
      "  tol, rng, random, positive)\n",
      "D:\\Python\\Anaconda\\lib\\site-packages\\sklearn\\model_selection\\_split.py:1978: FutureWarning: The default value of cv will change from 3 to 5 in version 0.22. Specify it explicitly to silence this warning.\n",
      "  warnings.warn(CV_WARNING, FutureWarning)\n",
      "D:\\Python\\Anaconda\\lib\\site-packages\\sklearn\\linear_model\\coordinate_descent.py:471: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations. Duality gap: 0.027054040649757916, tolerance: 0.011786454323548159\n",
      "  tol, rng, random, positive)\n",
      "D:\\Python\\Anaconda\\lib\\site-packages\\sklearn\\linear_model\\coordinate_descent.py:471: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations. Duality gap: 0.03903056820499984, tolerance: 0.012426825215761522\n",
      "  tol, rng, random, positive)\n",
      "D:\\Python\\Anaconda\\lib\\site-packages\\sklearn\\model_selection\\_split.py:1978: FutureWarning: The default value of cv will change from 3 to 5 in version 0.22. Specify it explicitly to silence this warning.\n",
      "  warnings.warn(CV_WARNING, FutureWarning)\n",
      "D:\\Python\\Anaconda\\lib\\site-packages\\sklearn\\linear_model\\coordinate_descent.py:471: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations. Duality gap: 0.02140344498113933, tolerance: 0.012804483405193558\n",
      "  tol, rng, random, positive)\n",
      "D:\\Python\\Anaconda\\lib\\site-packages\\sklearn\\model_selection\\_split.py:1978: FutureWarning: The default value of cv will change from 3 to 5 in version 0.22. Specify it explicitly to silence this warning.\n",
      "  warnings.warn(CV_WARNING, FutureWarning)\n",
      "D:\\Python\\Anaconda\\lib\\site-packages\\sklearn\\linear_model\\coordinate_descent.py:471: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations. Duality gap: 0.018205122124149042, tolerance: 0.012926934423586901\n",
      "  tol, rng, random, positive)\n",
      "D:\\Python\\Anaconda\\lib\\site-packages\\sklearn\\linear_model\\coordinate_descent.py:471: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations. Duality gap: 0.1512474334035696, tolerance: 0.01169625382041196\n",
      "  tol, rng, random, positive)\n",
      "D:\\Python\\Anaconda\\lib\\site-packages\\sklearn\\linear_model\\coordinate_descent.py:471: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations. Duality gap: 0.0331333833250369, tolerance: 0.01339932132243644\n",
      "  tol, rng, random, positive)\n",
      "D:\\Python\\Anaconda\\lib\\site-packages\\sklearn\\model_selection\\_split.py:1978: FutureWarning: The default value of cv will change from 3 to 5 in version 0.22. Specify it explicitly to silence this warning.\n",
      "  warnings.warn(CV_WARNING, FutureWarning)\n",
      "D:\\Python\\Anaconda\\lib\\site-packages\\sklearn\\linear_model\\coordinate_descent.py:471: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations. Duality gap: 0.01745084946543507, tolerance: 0.012704038451941875\n",
      "  tol, rng, random, positive)\n",
      "D:\\Python\\Anaconda\\lib\\site-packages\\sklearn\\linear_model\\coordinate_descent.py:471: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations. Duality gap: 0.020761165977297935, tolerance: 0.012704038451941875\n",
      "  tol, rng, random, positive)\n",
      "D:\\Python\\Anaconda\\lib\\site-packages\\sklearn\\linear_model\\coordinate_descent.py:471: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations. Duality gap: 0.13309861220234342, tolerance: 0.011493449210314382\n",
      "  tol, rng, random, positive)\n",
      "D:\\Python\\Anaconda\\lib\\site-packages\\sklearn\\linear_model\\coordinate_descent.py:471: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations. Duality gap: 0.0331333833250369, tolerance: 0.01339932132243644\n",
      "  tol, rng, random, positive)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Lasso score: 0.1143 (0.0058)\n",
      "\n"
     ]
    }
   ],
   "source": [
    "clf1 = LassoCV(alphas = [1, 0.1, 0.001, 0.0005,0.0003,0.0002, 5e-4])\n",
    "clf1.fit(X_train, y)\n",
    "lasso_preds = np.expm1(clf1.predict(X_test)) # exp(x) - 1  <---->log1p(x)==log(1+x)\n",
    "score1 = rmse_cv(clf1)\n",
    "print(\"\\nLasso score: {:.4f} ({:.4f})\\n\".format(score1.mean(), score1.std()))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 5.3 ELASTIC NET"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "ElasticNet score: 0.1129 (0.0061)\n",
      "\n"
     ]
    }
   ],
   "source": [
    "clf2 = ElasticNet(alpha=0.0005, l1_ratio=0.9)\n",
    "clf2.fit(X_train, y)\n",
    "elas_preds = np.expm1(clf2.predict(X_test))\n",
    " \n",
    "score2 = rmse_cv(clf2)\n",
    "print(\"\\nElasticNet score: {:.4f} ({:.4f})\\n\".format(score2.mean(), score2.std()))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 5.4 #XGBOOST"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[21:23:17] WARNING: C:/Jenkins/workspace/xgboost-win64_release_0.90/src/objective/regression_obj.cu:152: reg:linear is now deprecated in favor of reg:squarederror.\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "D:\\Python\\Anaconda\\lib\\site-packages\\xgboost\\core.py:587: FutureWarning: Series.base is deprecated and will be removed in a future version\n",
      "  if getattr(data, 'base', None) is not None and \\\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[21:23:38] WARNING: C:/Jenkins/workspace/xgboost-win64_release_0.90/src/objective/regression_obj.cu:152: reg:linear is now deprecated in favor of reg:squarederror.\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "D:\\Python\\Anaconda\\lib\\site-packages\\xgboost\\core.py:587: FutureWarning: Series.base is deprecated and will be removed in a future version\n",
      "  if getattr(data, 'base', None) is not None and \\\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[21:23:54] WARNING: C:/Jenkins/workspace/xgboost-win64_release_0.90/src/objective/regression_obj.cu:152: reg:linear is now deprecated in favor of reg:squarederror.\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "D:\\Python\\Anaconda\\lib\\site-packages\\xgboost\\core.py:587: FutureWarning: Series.base is deprecated and will be removed in a future version\n",
      "  if getattr(data, 'base', None) is not None and \\\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[21:24:08] WARNING: C:/Jenkins/workspace/xgboost-win64_release_0.90/src/objective/regression_obj.cu:152: reg:linear is now deprecated in favor of reg:squarederror.\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "D:\\Python\\Anaconda\\lib\\site-packages\\xgboost\\core.py:587: FutureWarning: Series.base is deprecated and will be removed in a future version\n",
      "  if getattr(data, 'base', None) is not None and \\\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[21:24:22] WARNING: C:/Jenkins/workspace/xgboost-win64_release_0.90/src/objective/regression_obj.cu:152: reg:linear is now deprecated in favor of reg:squarederror.\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "D:\\Python\\Anaconda\\lib\\site-packages\\xgboost\\core.py:587: FutureWarning: Series.base is deprecated and will be removed in a future version\n",
      "  if getattr(data, 'base', None) is not None and \\\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[21:24:36] WARNING: C:/Jenkins/workspace/xgboost-win64_release_0.90/src/objective/regression_obj.cu:152: reg:linear is now deprecated in favor of reg:squarederror.\n",
      "\n",
      "xgb score: 0.1235 (0.0048)\n",
      "\n"
     ]
    }
   ],
   "source": [
    "\n",
    "clf3=xgb.XGBRegressor(colsample_bytree=0.4,\n",
    "                 gamma=0.045,\n",
    "                 learning_rate=0.07,\n",
    "                 max_depth=20,\n",
    "                 min_child_weight=1.5,\n",
    "                 n_estimators=300,\n",
    "                 reg_alpha=0.65,\n",
    "                 reg_lambda=0.45,\n",
    "                 subsample=0.95)\n",
    " \n",
    "clf3.fit(X_train, y.values)\n",
    "xgb_preds = np.expm1(clf3.predict(X_test))\n",
    " \n",
    " \n",
    "score3 = rmse_cv(clf3)\n",
    "print(\"\\nxgb score: {:.4f} ({:.4f})\\n\".format(score3.mean(), score3.std()))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 5.5 模型融合"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "final_result = 0.7*lasso_preds + 0.15*xgb_preds+0.15*elas_preds #0.11327\n",
    " \n",
    "solution = pd.DataFrame({\"id\":test.index+1461, \"SalePrice\":final_result}, columns=['id', 'SalePrice'])\n",
    "solution.to_csv(\"result011621.csv\", index = False)  #"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#也曾进行过以下组合尝试，但最终效果不够优越\n",
    "# final_result1 = 0.7*lasso_preds + 0.25*xgb_preds+0.05*elas_preds \n",
    "# final_result1 = 0.7*lasso_preds + 0.1*xgb_preds+0.2*elas_preds \n",
    "# final_result1 = 0.7*lasso_preds + 0.1*xgb_preds+0.2*elas_preds \n",
    "# final_result1 = 0.7*lasso_preds + 0.1*xgb_preds+0.2*elas_preds "
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
